home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
listings
/
v_09_05
/
9n05130a
< prev
next >
Wrap
Text File
|
1991-03-17
|
4KB
|
125 lines
/*
is_alike() Determins whether two character strings are
sufficiently equal using the 'Levenstein distance'
algorithm.
Limitation: For memory space conservation and execution speed
this implementation compares the first 20
characters of both string only. See l_distance.c()
listing.
'#define COMP_LEN' may be changed to decrease or
increase the number of characters compared.
Returns: The constant ACCEPTED or REJECTED.
De-commenting the following '#define DEBUG' will
create a stand-alone program, which enables you
to experiment with different values for the
constants 'addition', 'change' and 'deletion'.
*/
/* #define DEBUG */
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#ifdef DEBUG
#include <stdio.h>
#endif
int addition = 1; /* change constants in this block */
int change = 2; /* of four lines if needed. */
int deletion = 3;
#define COMP_LEN 20
#define TU(x) toupper(x)
#define ARR_SIZE COMP_LEN + 1
#define SMALLEST_OF(x,y,z) ( (x<y) ? min(x,z) : min(y,z) )
#define ZERO_IF_EQUAL(x,y) (TU(requested[x-1])==TU(found[y-1]) ? 0 : change)
#define ACCEPTED 1
#define REJECTED 0
int distance[ARR_SIZE][ARR_SIZE];
int is_alike(char *requested, char *found)
{
register int i,j;
int r_len, f_len, threshold;
/* first test: compare first character of both strings */
if (toupper(*requested) != toupper(*found)) {
#ifdef DEBUG
printf("\nRejected due to difference in first letters\n");
#endif
return(REJECTED); /* different first characters */
}
r_len = (strlen(requested)>COMP_LEN ? COMP_LEN : strlen(requested));
f_len = (strlen(found)>COMP_LEN ? COMP_LEN : strlen(found));
/* second test: comparing string length */
threshold = (int) floor( (double) 1 + ((r_len + 2) / 4));
if ( abs(r_len - f_len) > threshold) {
#ifdef DEBUG
printf("\nRejected due to large length difference\n");
#endif
return(REJECTED); /* length difference too big */
}
distance[0][0] = 0;
for (j = 1; j <= ARR_SIZE ; j++)
distance[0][j] = distance[0][j-1] + addition;
for (j = 1; j <= ARR_SIZE ; j++)
distance[j][0] = distance[j-1][0] + deletion;
for (i = 1; i <= r_len; i++)
for (j = 1; j <= f_len; j++)
distance[i][j] = SMALLEST_OF(
(distance[i-1][j-1] + ZERO_IF_EQUAL(i,j)),
(distance[i][j-1] + addition),
(distance[i-1][j] + deletion) );
#ifdef DEBUG
printf("\nis_alike(): threshold %d\n\n",threshold);
for (i=1; i <= r_len; i++) {
for (j=1; j <= f_len; j++)
printf(" %3d",distance[i][j]);
printf("\n");
}
printf("\nis_alike(): l_distance is %d\n",distance[r_len][f_len]);
#endif
/* third test: Levenstein distance within threshold limit ? */
if ( distance[r_len][f_len] <= threshold )
return(ACCEPTED); /* distance within limits */
else
return(REJECTED); /* distance too big */
}
#ifdef DEBUG
void usage()
{
printf("\nUsage: IA requested found\n");
printf("\n requested & found are the two strings to");
printf("\n be compared with the is_alike() function");
printf("\n containing the Levenstein distance algorithm.\n\n");
}
int main(int argc, char *argv[])
{
if (argc != 3) {
usage();
exit(1);
} else {
printf("\nMain(): Comparing '%s' and '%s':\n", argv[1], argv[2]);
if (is_alike(argv[1], argv[2]))
printf("\nMain(): Strings sufficiently alike...\n\n");
else
printf("\nMain(): Strings differing too much...\n\n");
}
return(0);
}
#endif